Tasks complete 6/6

Task 1

getwd()
## [1] "/Users/hunterdarr/Documents/RPACKAGES/FALL224753darr0006/LAB8"

Task 2

Make the sample

labSample = runif(10,0,5)
labSample
##  [1] 4.0019253 4.5725337 0.2698062 2.4463107 0.7350226 3.4738943 2.3056199
##  [8] 3.3662620 2.6770966 1.7441278

Calculate mean and varience

Mean = \((0 + 5)/2\) = 2.5 Varience = \(((5-0)^2)/12\) = 2.08333333333

Sample mean and varience

mean(labSample)
## [1] 2.55926
var(labSample)
## [1] 1.883024

For the mean and varience that I am currently looking at is [1] 3.135563 [1] 2.212281

These numbers are far off from our estimates. I believe this is because we have a low sample size.

find T and mean Y

The sample I am using is: [1] 1.7273554 4.0384836 2.4628431 4.7507547 1.8641004 0.3166395 4.2169844 3.2129434 [9] 4.7000881 4.0654404

T = the sum of Y’s = \(1.7273554 + 4.0384836 + 2.4628431 + 4.7507547 + 1.8641004 + 0.3166395 + 4.2169844 + 3.2129434 + 4.7000881 + 4.0654404\) = 31.35563

mean of Y = T/n = 31.35563/10 = 3.135563

Explaining the myclt() function.

Line A creates a uniform distribution with size = n*iter, a lower limit of 0, and a upper limit of 5.

Line B makes a matrix based off of the dstribution we made in line A.

Line C outputs our matrix as a list and stores it in sm.

Line D calls upon our function with n = 10 and iter = 10000.

myclt() call.

myclt=function(n,iter){
y=runif(n*iter,0,5) # A
data=matrix(y,nr=n,nc=iter,byrow=TRUE) #B
sm=apply(data,2,sum) #C
hist(sm)
sm
}
w=myclt(n=10,iter=10000) #D

Changed function

mycltChanged=function(n,iter){
y=runif(n*iter,0,5) # A
data=matrix(y,nr=n,nc=iter,byrow=TRUE) #B
sm=apply(data,2,mean) #C
hist(sm)
sm
}
w=mycltChanged(n=10,iter=10000) #D

Task 3

## Notice that I have assigned default values which can be changed when the function is called
mycltu=function(n,iter,a=0,b=10){
## r-random sample from the uniform
y=runif(n*iter,a,b)
## Place these numbers into a matrix
## The columns will correspond to the iteration and the rows will equal the sample size n
data=matrix(y,nr=n,nc=iter,byrow=TRUE)
## apply the function mean to the columns (2) of the matrix
## these are placed in a vector w
w=apply(data,2,mean)
## We will make a histogram of the values in w
## How high should we make y axis?
## All the values used to make a histogram are placed in param (nothing is plotted yet)
param=hist(w,plot=FALSE)
## Since the histogram will be a density plot we will find the max density

ymax=max(param$density)
## To be on the safe side we will add 10% more to this
ymax=1.1*ymax
## Now we can make the histogram
hist(w,freq=FALSE,  ylim=c(0,ymax), main=paste("Histogram of sample mean",
"\n", "sample size= ",n,sep=""),xlab="Sample mean")
## add a density curve made from the sample distribution
lines(density(w),col="Blue",lwd=3) # add a density plot
## Add a theoretical normal curve 
curve(dnorm(x,mean=(a+b)/2,sd=(b-a)/(sqrt(12*n))),add=TRUE,col="Red",lty=2,lwd=3) # add a theoretical curve
## Add the density from which the samples were taken
curve(dunif(x,a,b),add=TRUE,lwd=4)

}
#mycltu(n=20,iter=100000)

how does the apply function use the 2?

The specifies whether to apply the mean to the columns or the rows. The 2 specifies that we will take the mean of the of the columns. This makes sense because the columns are our iterations. For example if we have a size of 10 and 20 iterations, each iteration will have 10 numbers. We want to get the mean of the iterations so we will mean the columns(which is 2).

How many terms are in w, when mycltu(n=20,iter=100000) is called?

w will have 100000 terms. This is because 2 stores the mean of our iterations and we have 100000 iterations.

We can prove this with the following R code:

yExample=runif(20*100000,0,5)
## Place these numbers into a matrix
## The columns will correspond to the iteration and the rows will equal the sample size n
dataExample=matrix(yExample,nr=20,nc=100000,byrow=TRUE)
## apply the function mean to the columns (2) of the matrix
## these are placed in a vector w
wExample=apply(dataExample,2,mean)
## We will make a histog

length(wExample)
## [1] 100000

curve(dnorm(x,mean=(a+b)/2

This plots the theoretical normal curve using dnorm.

The standard deviation is (b-a)/(sqrt(12*n)) because ((b-a)^2)/12 is the uniform distribution variance. To find the sd we square root the varience. the 12 times n exists because we need to adjust for our sample size.

Record the plots using the following parameters and options

mycltu(1,10000,0,10)

mycltu(2,10000,0,10)

mycltu(3,10000,0,10)

mycltu(5,10000,0,10)

mycltu(10,10000,0,10)

mycltu(30,10000,0,10)

What do I conclude?

I can conclude that if a sample is large enough it will accurately represent a larger sample. We can see that once n=10 it looks nearly the same as n=30. However, n=1 does not look the same and n=2, while more similar than n=1, does not look similar to n=30.

Task 4

mycltb=function(n,iter,p=0.5,...){

## r-random sample from the Binomial
y=rbinom(n*iter,size=n,prob=p)
## Place these numbers into a matrix
## The columns will correspond to the iteration and the rows will equal the sample size n
data=matrix(y,nr=n,nc=iter,byrow=TRUE)
## apply the function mean to the columns (2) of the matrix
## these are placed in a vector w
w=apply(data,2,mean)
## We will make a histogram of the values in w
## How high should we make y axis?
## All the values used to make a histogram are placed in param (nothing is plotted yet)
param=hist(w,plot=FALSE)
## Since the histogram will be a density plot we will find the max density

ymax=max(param$density)
## To be on the safe side we will add 10% more to this
ymax=1.1*ymax

## Now we can make the histogram
## freq=FALSE means take a density
hist(w,freq=FALSE,  ylim=c(0,ymax),
main=paste("Histogram of sample mean","\n", "sample size= ",n,sep=""),
xlab="Sample mean",...)
## add a density curve made from the sample distribution
#lines(density(w),col="Blue",lwd=3) # add a density plot
## Add a theoretical normal curve 
curve(dnorm(x,mean=n*p,sd=sqrt(p*(1-p))),add=TRUE,col="Red",lty=2,lwd=3) 

}
#mycltb(n=5,iter=10000,p=0.5)

Make graphs for the following parameters and options

mycltb(n=4,iter=10000,p=0.3)

mycltb(n=5,iter=10000,p=0.3)

mycltb(n=10,iter=10000,p=0.3)

mycltb(n=20,iter=10000,p=0.3)

Do the same, except use p=0.7

mycltb(n=4,iter=10000,p=0.7)

mycltb(n=5,iter=10000,p=0.7)

mycltb(n=10,iter=10000,p=0.7)

mycltb(n=20,iter=10000,p=0.7)

Do the same again this time with p=0.5

mycltb(n=4,iter=10000,p=0.5)

mycltb(n=5,iter=10000,p=0.5)

mycltb(n=10,iter=10000,p=0.5)

mycltb(n=20,iter=10000,p=0.5)

What do you conclude?

My conclusion is similar to task 3 in the sense that if you have a large enough sample, it will accurately represent a larger sample.

Task 5

mycltp=function(n,iter,lambda=10,...){

## r-random sample from the Poisson
y=rpois(n*iter,lambda=lambda)
## Place these numbers into a matrix
## The columns will correspond to the iteration and the rows will equal the sample size n
data=matrix(y,nr=n,nc=iter,byrow=TRUE)
## apply the function mean to the columns (2) of the matrix
## these are placed in a vector w
w=apply(data,2,mean)
## We will make a histogram of the values in w
## How high should we make y axis?
## All the values used to make a histogram are placed in param (nothing is plotted yet)
param=hist(w,plot=FALSE)
## Since the histogram will be a density plot we will find the max density

ymax=max(param$density)
## To be on the safe side we will add 10% more to this
ymax=1.1*ymax

## Make a suitable layout for graphing
layout(matrix(c(1,1,2,3),nr=2,nc=2, byrow=TRUE))

## Now we can make the histogram
hist(w,freq=FALSE,  ylim=c(0,ymax), col=rainbow(max(w)),
main=paste("Histogram of sample mean","\n", "sample size= ",n," iter=",iter," lambda=",lambda,sep=""),
xlab="Sample mean",...)
## add a density curve made from the sample distribution
#lines(density(w),col="Blue",lwd=3) # add a density plot
## Add a theoretical normal curve 
curve(dnorm(x,mean=lambda,sd=sqrt(lambda/n)),add=TRUE,col="Red",lty=2,lwd=3) # add a theoretical curve

# Now make a new plot
# Since y is discrete we should use a barplot
barplot(table(y)/(n*iter),col=rainbow(max(y)), main="Barplot of sampled y", ylab ="Rel. Freq",xlab="y" )
x=0:max(y)
plot(x,dpois(x,lambda=lambda),type="h",lwd=5,col=rainbow(max(y)),
main="Probability function for Poisson", ylab="Probability",xlab="y")
}
#mycltp(n=10,iter=10000)

Make graphs for the following parameters and options

mycltp(n=3,iter=10000,lambda=4)

mycltp(n=5,iter=10000,lambda=4)

mycltp(n=10,iter=10000,lambda=4)

mycltp(n=20,iter=10000,lambda=4)

The same for lambda = 10

mycltp(n=3,iter=10000,lambda=10)

mycltp(n=5,iter=10000,lambda=10)

mycltp(n=10,iter=10000,lambda=10)

mycltp(n=20,iter=10000,lambda=10)

Task 6

Pick one of the above functions and add it to your package

I picked the mycltu function.

dontShowOutput = FALL224753darr0006::mycltu(n=54,iter=100000)